This project is trying to investigate if investment in different industry segments would impact on the amount of CO2 emission. Varibales choosed are CO2 emissions total, CO2 emissions per capita, CO2 emissions per GDP, investment in energy, investment in telecommunication, investment in transportation, investment in water.
data<-read_xls("climate_change_download_0.xls",sheet = 1,col_types = "guess",na = "..")
summary(data)
## Country code Country name Series code
## Length:13512 Length:13512 Length:13512
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Series name SCALE Decimals 1990
## Length:13512 Min. :0 Min. :0.000 Min. :-3.000e+03
## Class :character 1st Qu.:0 1st Qu.:0.000 1st Qu.: 4.000e+00
## Mode :character Median :0 Median :1.000 Median : 8.100e+01
## Mean :0 Mean :0.581 Mean : 1.698e+10
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.: 2.704e+03
## Max. :0 Max. :1.000 Max. : 2.190e+13
## NA's :3495 NA's :3495 NA's :8658
## 1991 1992 1993
## Min. :-8.190e+02 Min. :-8.040e+02 Min. :-7.590e+02
## 1st Qu.: 4.000e+00 1st Qu.: 4.000e+00 1st Qu.: 4.000e+00
## Median : 1.000e+02 Median : 1.010e+02 Median : 1.000e+02
## Mean : 2.454e+10 Mean : 2.503e+10 Mean : 2.493e+10
## 3rd Qu.: 4.131e+03 3rd Qu.: 4.220e+03 3rd Qu.: 4.243e+03
## Max. : 2.297e+13 Max. : 2.452e+13 Max. : 2.488e+13
## NA's :10015 NA's :9859 NA's :9795
## 1994 1995 1996
## Min. :-8.500e+02 Min. :-7.950e+02 Min. :-6.800e+02
## 1st Qu.: 4.000e+00 1st Qu.: 7.000e+00 1st Qu.: 4.000e+00
## Median : 1.020e+02 Median : 1.000e+02 Median : 1.070e+02
## Mean : 2.635e+10 Mean : 2.379e+10 Mean : 3.009e+10
## 3rd Qu.: 5.409e+03 3rd Qu.: 5.863e+03 3rd Qu.: 8.394e+03
## Max. : 2.672e+13 Max. : 2.967e+13 Max. : 3.028e+13
## NA's :9733 NA's :8840 NA's :9708
## 1997 1998 1999
## Min. :-7.280e+02 Min. :-6.020e+02 Min. :-5.040e+02
## 1st Qu.: 4.000e+00 1st Qu.: 5.000e+00 1st Qu.: 6.000e+00
## Median : 1.260e+02 Median : 1.160e+02 Median : 1.020e+02
## Mean : 3.046e+10 Mean : 2.983e+10 Mean : 2.915e+10
## 3rd Qu.: 1.559e+04 3rd Qu.: 1.535e+04 3rd Qu.: 8.830e+03
## Max. : 3.019e+13 Max. : 3.008e+13 Max. : 3.121e+13
## NA's :9745 NA's :9694 NA's :9507
## 2000 2001 2002
## Min. :-5.400e+02 Min. :-6.360e+02 Min. :-8.310e+02
## 1st Qu.: 5.000e+00 1st Qu.: 5.000e+00 1st Qu.: 5.000e+00
## Median : 9.300e+01 Median : 1.030e+02 Median : 1.020e+02
## Mean : 2.196e+10 Mean : 2.995e+10 Mean : 3.078e+10
## 3rd Qu.: 3.602e+03 3rd Qu.: 1.053e+04 3rd Qu.: 7.915e+03
## Max. : 3.221e+13 Max. : 3.201e+13 Max. : 3.327e+13
## NA's :8016 NA's :9494 NA's :9455
## 2003 2004 2005
## Min. :-9.800e+02 Min. :-1.034e+03 Min. :-1.028e+03
## 1st Qu.: 5.000e+00 1st Qu.: 5.000e+00 1st Qu.: 8.000e+00
## Median : 1.020e+02 Median : 1.000e+02 Median : 1.000e+02
## Mean : 3.501e+10 Mean : 3.812e+10 Mean : 3.471e+10
## 3rd Qu.: 8.840e+03 3rd Qu.: 6.497e+03 3rd Qu.: 6.348e+03
## Max. : 3.744e+13 Max. : 4.219e+13 Max. : 4.562e+13
## NA's :9469 NA's :9287 NA's :8428
## 2006 2007 2008
## Min. :-1.015e+03 Min. :-1.013e+03 Min. :-1.007e+03
## 1st Qu.: 5.000e+00 1st Qu.: 5.000e+00 1st Qu.: 6.000e+00
## Median : 1.000e+02 Median : 1.010e+02 Median : 9.900e+01
## Mean : 4.585e+10 Mean : 5.255e+10 Mean : 5.415e+10
## 3rd Qu.: 7.716e+03 3rd Qu.: 9.454e+03 3rd Qu.: 5.548e+03
## Max. : 4.945e+13 Max. : 5.580e+13 Max. : 6.126e+13
## NA's :9276 NA's :9264 NA's :8909
## 2009 2010 2011
## Min. :-9.900e+02 Min. :-3.200e+01 Mode:logical
## 1st Qu.: 3.000e+00 1st Qu.: 3.000e+00 TRUE:466
## Median : 9.200e+01 Median : 1.030e+02 NA's:13046
## Mean : 6.287e+10 Mean : 1.104e+11
## 3rd Qu.: 6.840e+03 3rd Qu.: 4.902e+06
## Max. : 5.808e+13 Max. : 6.304e+13
## NA's :9751 NA's :11180
mydata<-filter(data,`Series code` =="IE.PPI.ENGY.CD" |`Series code` == "IE.PPI.TELE.CD"|`Series code` =="IE.PPI.TRAN.CD" |`Series code` =="IE.PPI.WATR.CD"|`Series code` =="EN.ATM.CO2E.PC"|`Series code` =="EN.ATM.CO2E.PP.GD.KD"|`Series code` =="EN.ATM.CO2E.KT")
names(mydata)[1]<-"countrycode"
names(mydata)[2]<-"countryname"
names(mydata)[3]<-"seriescode"
names(mydata)[4]<-"seriesname"
mydata<-mydata %>% as.data.frame() %>% melt(measure.var=7:28)
names(mydata)[7]<-"year"
summary(mydata)
## countrycode countryname seriescode
## Length:35882 Length:35882 Length:35882
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## seriesname SCALE Decimals year
## Length:35882 Min. :0 Min. :0.0000 1990 : 1631
## Class :character 1st Qu.:0 1st Qu.:0.0000 1991 : 1631
## Mode :character Median :0 Median :0.0000 1992 : 1631
## Mean :0 Mean :0.4286 1993 : 1631
## 3rd Qu.:0 3rd Qu.:1.0000 1994 : 1631
## Max. :0 Max. :1.0000 1995 : 1631
## (Other):26096
## value
## Min. :-3.000e+03
## 1st Qu.: 8.000e+00
## Median : 4.630e+02
## Mean : 4.726e+08
## 3rd Qu.: 2.167e+05
## Max. : 7.715e+10
## NA's :20700
names(mydata)
## [1] "countrycode" "countryname" "seriescode" "seriesname" "SCALE"
## [6] "Decimals" "year" "value"
spreadtest<-tidyr::unite(mydata,"series",c("seriesname","seriescode"),sep="",remove=TRUE,na.rm=FALSE)
spreadtest$SCALE=NULL
spreadtest$Decimals=NULL
spreadtest<-tidyr::spread(spreadtest,series,value)
names(spreadtest)[4]<-"CO2percapita"
names(spreadtest)[5]<-"CO2perGDP"
names(spreadtest)[6]<-"CO2total"
names(spreadtest)[7]<-"investenergy"
names(spreadtest)[8]<-"investtelecom"
names(spreadtest)[9]<-"investtransport"
names(spreadtest)[10]<-"investwater"
selected<-filter(spreadtest,countrycode=="ARG"|countrycode=="BRA"|countrycode=="ARG"|countrycode=="CHL"|countrycode=="CHN"|countrycode=="COL"|countrycode=="IDN"|countrycode=="IND"|
countrycode=="MEX"|countrycode=="MYS"|countrycode=="PER"|countrycode=="PHL"|countrycode=="RUS"|countrycode=="THA")
table1<-xtabs(~CO2total+countryname,data=spreadtest)
table1<-aggregate(CO2total~countryname,rm.na=TRUE,data=spreadtest,FUN = sum)
table2<-xtabs(~CO2percapita+countryname,data=spreadtest)
table2<-aggregate(CO2percapita~countryname,rm.na=TRUE,data=spreadtest,FUN=sum)
table3<-xtabs(~CO2perGDP+countryname,data=spreadtest)
table3<-aggregate(CO2perGDP~countryname,rm.na=TRUE,data=spreadtest,FUN = sum)
ggplot(spreadtest,aes(x=countryname,y=CO2total,color=year))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(spreadtest,aes(x=countryname,y=CO2percapita,color=year))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(selected,aes(x=countryname,y=CO2perGDP,color=year))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(spreadtest,aes(x=year,y=CO2total))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(spreadtest,aes(x=year,y=CO2percapita))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(selected,aes(x=as.numeric(as.character(year)),y=CO2perGDP,color=countryname))+geom_point()+geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_ly(selected,x=~as.numeric(as.character(year)),y=~CO2perGDP,type='scatter',mode='lines')
plot_ly(selected,x=~as.numeric(as.character(year)),y=~CO2total,type='scatter',mode='lines')
plot_ly(selected,x=~as.numeric(as.character(year)),y=~CO2percapita,type='scatter',mode='lines')
3.2. Top emission from country (CO2 emission per GDP)
clean<-na.omit(spreadtest)
topemission<-arrange(table3,CO2perGDP)
ggplot(head(topemission,n=10),aes(countryname,CO2perGDP,fill=countryname,label=paste(CO2perGDP)))+geom_bar(sta='identity',color='black')+
coord_flip()+theme_bw()+theme(legend.position = 'none')+geom_text(aes(y=1),hjust=0,vjust=0.5,size=4)+
labs(title='Top countries with highest CO2 emission per GDP')
3.3 Comparing Investment in Different Segments with CO2 Emission for Each Selected Country
ARG<-filter(selected,countrycode=="ARG")
plot_ly(ARG,x=~ARG$year,y=~ARG$CO2percapita,name="CO2percapita",type='scatter',mode='lines') %>%
add_trace(y=~scale(ARG$investenergy),name='energy',mode="lines+markers") %>%
add_trace(y=~scale(ARG$investtelecom),name='telecom',mode='lines+markers') %>%
add_trace(y=~scale(ARG$investtransport),name='transport',mode='lines+markers')
3.4 Use the 13 Countries as a Sample to Test if Investment in Different Segments would Impact the Amount of CO2 Emission
sample<-na.omit(selected[3:10])
trace_0<-aggregate(sample$CO2percapita,by=list(year=sample$year),FUN=sum)
trace_1<-aggregate(sample$investenergy,by=list(year=sample$year),FUN=sum)
trace_2<-aggregate(sample$investtelecom,by=list(year=sample$year),FUN=sum)
trace_3<-aggregate(sample$investtransport,by=list(year=sample$year),FUN=sum)
plot_ly(sample,x=~trace_0$year,y=~trace_0$x,name="CO2percapita",type='scatter',mode='lines') %>%
add_trace(y=~scale(trace_1$x),name='energy',mode='lines+markers') %>%
add_trace(y=~scale(trace_2$x),name='telecom',mode='lines+markers') %>%
add_trace(y=~scale(trace_3$x),name='transport',mode='lines+markers')